f3a51a
@@ -42,6 +42,8 @@
 
 import javax.ws.rs.core.UriBuilder;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -57,6 +59,8 @@
  * General utility methods.
  */
 public class TempletonUtils {
+  private static final Log LOG = LogFactory.getLog(TempletonUtils.class);
+
   /**
    * Is the object non-empty?
    */
@@ -98,6 +102,24 @@
public static boolean isset(char ch) {
   public static final Pattern PIG_COMPLETE = Pattern.compile(" \\d+% complete$");
   //looking for map = 100%,  reduce = 100%
   public static final Pattern HIVE_COMPLETE = Pattern.compile(" map = (\\d+%),\\s+reduce = (\\d+%).*$");
+  /**
+   * Hive on Tez produces progress report that looks like this
+   * Map 1: -/-	Reducer 2: 0/1	
+   * Map 1: -/-	Reducer 2: 0(+1)/1	
+   * Map 1: -/-	Reducer 2: 1/1
+   * 
+   * -/- means there are no tasks (yet)
+   * 0/1 means 1 total tasks, 0 completed
+   * 1(+2)/3 means 3 total, 1 completed and 2 running
+   * 
+   * HIVE-8495, in particular https://issues.apache.org/jira/secure/attachment/12675504/Screen%20Shot%202014-10-16%20at%209.35.26%20PM.png
+   * has more examples.
+   * To report progress, we'll assume all tasks are equal size and compute "completed" as percent of "total"
+   * "(Map|Reducer) (\\d+:) ((-/-)|(\\d+(\\(\\+\\d+\\))?/\\d+))" is the complete pattern but we'll drop "-/-" to exclude
+   * groups that don't add information such as "Map 1: -/-"
+   */
+  public static final Pattern TEZ_COMPLETE = Pattern.compile("(Map|Reducer) (\\d+:) (\\d+(\\(\\+\\d+\\))?/\\d+)");
+  public static final Pattern TEZ_COUNTERS = Pattern.compile("\\d+");
 
   /**
    * Extract the percent complete line from Pig or Jar jobs.
@@ -115,6 +137,31 @@
public static String extractPercentComplete(String line) {
     if(hive.find()) {
       return "map " + hive.group(1) + " reduce " + hive.group(2);
     }
+    Matcher tez = TEZ_COMPLETE.matcher(line);
+    if(tez.find()) {
+      int totalTasks = 0;
+      int completedTasks = 0;
+      do {
+        //here each group looks something like "Map 2: 2/4" "Reducer 3: 1(+2)/4"
+        //just parse the numbers and ignore one from "Map 2" and from "(+2)" if it's there
+        Matcher counts = TEZ_COUNTERS.matcher(tez.group());
+        List<String> items = new ArrayList<String>(4);
+        while(counts.find()) {
+          items.add(counts.group());
+        }
+        completedTasks += Integer.parseInt(items.get(1));
+        if(items.size() == 3) {
+          totalTasks += Integer.parseInt(items.get(2));
+        }
+        else {
+          totalTasks += Integer.parseInt(items.get(3));
+        }
+      } while(tez.find());
+      if(totalTasks == 0) {
+        return "0% complete (0 total tasks)";
+      }
+      return completedTasks * 100 / totalTasks + "% complete";
+    }
     return null;
   }
 
